From 4d722b0de9ea6b24f405e74eb41d3040104f9b6c Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Mon, 26 Jul 2004 15:09:53 +0000 Subject: [PATCH] bitkeeper revision 1.1108.17.1 (41051ec1NERNxLF017rAWe7ljBk92w) A better fix for blkdev request merging. Should work for both IDE and SCSI, and do as much merging as possible, and also no need for PIO fallback mode. --- .rootkeys | 1 + linux-2.4.26-xen-sparse/include/asm-xen/pci.h | 25 +- .../include/linux/blkdev.h | 372 ++++++++++++++++++ 3 files changed, 376 insertions(+), 22 deletions(-) create mode 100644 linux-2.4.26-xen-sparse/include/linux/blkdev.h diff --git a/.rootkeys b/.rootkeys index 053bb5cede..39fcdfbc47 100644 --- a/.rootkeys +++ b/.rootkeys @@ -135,6 +135,7 @@ 3f689063nhrIRsMMZjZxMFk7iEINqQ linux-2.4.26-xen-sparse/include/asm-xen/xen_proc.h 40659defgWA92arexpMGn8X3QMDj3w linux-2.4.26-xen-sparse/include/asm-xen/xor.h 3f056927gMHl7mWB89rb73JahbhQIA linux-2.4.26-xen-sparse/include/linux/blk.h +41051ec1m6bJVjZocTG0C0V0O6RsVg linux-2.4.26-xen-sparse/include/linux/blkdev.h 401c0590D_kwJDU59X8NyvqSv_Cl2A linux-2.4.26-xen-sparse/include/linux/sched.h 40a248afgI0_JKthdYAe8beVfXSTpQ linux-2.4.26-xen-sparse/include/linux/skbuff.h 3e5a4e686V0nioX2ZpFf056sgvdiQw linux-2.4.26-xen-sparse/include/linux/sunrpc/debug.h diff --git a/linux-2.4.26-xen-sparse/include/asm-xen/pci.h b/linux-2.4.26-xen-sparse/include/asm-xen/pci.h index 382b7a41de..74ae5ba8b1 100644 --- a/linux-2.4.26-xen-sparse/include/asm-xen/pci.h +++ b/linux-2.4.26-xen-sparse/include/asm-xen/pci.h @@ -145,8 +145,7 @@ static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction) { - int i, j, nr_pfns; - unsigned long first_pfn; + int i; if (direction == PCI_DMA_NONE) out_of_line_bug(); @@ -160,28 +159,10 @@ static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, else if (!sg[i].address && !sg[i].page) out_of_line_bug(); - if (sg[i].address) { + if (sg[i].address) sg[i].dma_address = virt_to_bus(sg[i].address); - first_pfn = virt_to_phys(sg[i].address) >> PAGE_SHIFT; - nr_pfns = (((unsigned long)sg[i].address & - (PAGE_SIZE-1)) + sg[i].length + PAGE_SIZE - 1) >> - PAGE_SHIFT; - } else { + else sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; - first_pfn = page_to_phys(sg[i].page) >> PAGE_SHIFT; - nr_pfns = (sg[i].offset + sg[i].length + PAGE_SIZE - - 1) >> PAGE_SHIFT; - } - - /* - * Check that we merged physical buffers are also contiguous - * in machine-address space. We try to fail by returning 0. - */ - for (j = 1; j < nr_pfns; j++) { - if ( unlikely(pfn_to_mfn(first_pfn+j) != - (pfn_to_mfn(first_pfn)+j)) ) - return 0; - } } flush_write_buffers(); diff --git a/linux-2.4.26-xen-sparse/include/linux/blkdev.h b/linux-2.4.26-xen-sparse/include/linux/blkdev.h new file mode 100644 index 0000000000..09178c4203 --- /dev/null +++ b/linux-2.4.26-xen-sparse/include/linux/blkdev.h @@ -0,0 +1,372 @@ +#ifndef _LINUX_BLKDEV_H +#define _LINUX_BLKDEV_H + +#include +#include +#include +#include +#include +#include + +#include + +struct request_queue; +typedef struct request_queue request_queue_t; +struct elevator_s; +typedef struct elevator_s elevator_t; + +/* + * Ok, this is an expanded form so that we can use the same + * request for paging requests. + */ +struct request { + struct list_head queue; + int elevator_sequence; + + volatile int rq_status; /* should split this into a few status bits */ +#define RQ_INACTIVE (-1) +#define RQ_ACTIVE 1 +#define RQ_SCSI_BUSY 0xffff +#define RQ_SCSI_DONE 0xfffe +#define RQ_SCSI_DISCONNECTING 0xffe0 + + kdev_t rq_dev; + int cmd; /* READ or WRITE */ + int errors; + unsigned long start_time; + unsigned long sector; + unsigned long nr_sectors; + unsigned long hard_sector, hard_nr_sectors; + unsigned int nr_segments; + unsigned int nr_hw_segments; + unsigned long current_nr_sectors, hard_cur_sectors; + void * special; + char * buffer; + struct completion * waiting; + struct buffer_head * bh; + struct buffer_head * bhtail; + request_queue_t *q; +}; + +#include + +typedef int (merge_request_fn) (request_queue_t *q, + struct request *req, + struct buffer_head *bh, + int); +typedef int (merge_requests_fn) (request_queue_t *q, + struct request *req, + struct request *req2, + int); +typedef void (request_fn_proc) (request_queue_t *q); +typedef request_queue_t * (queue_proc) (kdev_t dev); +typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh); +typedef void (plug_device_fn) (request_queue_t *q, kdev_t device); +typedef void (unplug_device_fn) (void *q); + +struct request_list { + unsigned int count; + unsigned int pending[2]; + struct list_head free; +}; + +struct request_queue +{ + /* + * the queue request freelist, one for reads and one for writes + */ + struct request_list rq; + + /* + * The total number of requests on each queue + */ + int nr_requests; + + /* + * Batching threshold for sleep/wakeup decisions + */ + int batch_requests; + + /* + * The total number of 512byte blocks on each queue + */ + atomic_t nr_sectors; + + /* + * Batching threshold for sleep/wakeup decisions + */ + int batch_sectors; + + /* + * The max number of 512byte blocks on each queue + */ + int max_queue_sectors; + + /* + * Together with queue_head for cacheline sharing + */ + struct list_head queue_head; + elevator_t elevator; + + request_fn_proc * request_fn; + merge_request_fn * back_merge_fn; + merge_request_fn * front_merge_fn; + merge_requests_fn * merge_requests_fn; + make_request_fn * make_request_fn; + plug_device_fn * plug_device_fn; + /* + * The queue owner gets to use this for whatever they like. + * ll_rw_blk doesn't touch it. + */ + void * queuedata; + + /* + * This is used to remove the plug when tq_disk runs. + */ + struct tq_struct plug_tq; + + /* + * Boolean that indicates whether this queue is plugged or not. + */ + int plugged:1; + + /* + * Boolean that indicates whether current_request is active or + * not. + */ + int head_active:1; + + /* + * Boolean that indicates you will use blk_started_sectors + * and blk_finished_sectors in addition to blk_started_io + * and blk_finished_io. It enables the throttling code to + * help keep the sectors in flight to a reasonable value + */ + int can_throttle:1; + + unsigned long bounce_pfn; + + /* + * Is meant to protect the queue in the future instead of + * io_request_lock + */ + spinlock_t queue_lock; + + /* + * Tasks wait here for free read and write requests + */ + wait_queue_head_t wait_for_requests; +}; + +#define blk_queue_plugged(q) (q)->plugged +#define blk_fs_request(rq) ((rq)->cmd == READ || (rq)->cmd == WRITE) +#define blk_queue_empty(q) list_empty(&(q)->queue_head) + +extern inline int rq_data_dir(struct request *rq) +{ + if (rq->cmd == READ) + return READ; + else if (rq->cmd == WRITE) + return WRITE; + else { + BUG(); + return -1; /* ahem */ + } +} + +extern unsigned long blk_max_low_pfn, blk_max_pfn; + +#define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) +#define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT) + +extern void blk_queue_bounce_limit(request_queue_t *, u64); + +#ifdef CONFIG_HIGHMEM +extern struct buffer_head *create_bounce(int, struct buffer_head *); +extern inline struct buffer_head *blk_queue_bounce(request_queue_t *q, int rw, + struct buffer_head *bh) +{ + struct page *page = bh->b_page; + +#ifndef CONFIG_DISCONTIGMEM + if (page - mem_map <= q->bounce_pfn) +#else + if ((page - page_zone(page)->zone_mem_map) + (page_zone(page)->zone_start_paddr >> PAGE_SHIFT) <= q->bounce_pfn) +#endif + return bh; + + return create_bounce(rw, bh); +} +#else +#define blk_queue_bounce(q, rw, bh) (bh) +#endif + +#ifdef CONFIG_XEN +/* Used for buffer merging, where it is imperative we use machine addresses! */ +#define bh_phys(bh) (page_to_bus((bh)->b_page) + bh_offset((bh))) +#else +#define bh_phys(bh) (page_to_phys((bh)->b_page) + bh_offset((bh))) +#endif + +#define BH_CONTIG(b1, b2) (bh_phys((b1)) + (b1)->b_size == bh_phys((b2))) +#define BH_PHYS_4G(b1, b2) ((bh_phys((b1)) | 0xffffffff) == ((bh_phys((b2)) + (b2)->b_size - 1) | 0xffffffff)) + +struct blk_dev_struct { + /* + * queue_proc has to be atomic + */ + request_queue_t request_queue; + queue_proc *queue; + void *data; +}; + +struct sec_size { + unsigned block_size; + unsigned block_size_bits; +}; + +/* + * Used to indicate the default queue for drivers that don't bother + * to implement multiple queues. We have this access macro here + * so as to eliminate the need for each and every block device + * driver to know about the internal structure of blk_dev[]. + */ +#define BLK_DEFAULT_QUEUE(_MAJOR) &blk_dev[_MAJOR].request_queue + +extern struct sec_size * blk_sec[MAX_BLKDEV]; +extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; +extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size); +extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); +extern void generic_make_request(int rw, struct buffer_head * bh); +extern inline request_queue_t *blk_get_queue(kdev_t dev); +extern void blkdev_release_request(struct request *); + +/* + * Access functions for manipulating queue properties + */ +extern int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors); +extern void blk_init_queue(request_queue_t *, request_fn_proc *); +extern void blk_cleanup_queue(request_queue_t *); +extern void blk_queue_headactive(request_queue_t *, int); +extern void blk_queue_throttle_sectors(request_queue_t *, int); +extern void blk_queue_make_request(request_queue_t *, make_request_fn *); +extern void generic_unplug_device(void *); +extern inline int blk_seg_merge_ok(struct buffer_head *, struct buffer_head *); + +extern int * blk_size[MAX_BLKDEV]; + +extern int * blksize_size[MAX_BLKDEV]; + +extern int * hardsect_size[MAX_BLKDEV]; + +extern int * max_readahead[MAX_BLKDEV]; + +extern int * max_sectors[MAX_BLKDEV]; + +extern int * max_segments[MAX_BLKDEV]; + +#define MAX_SEGMENTS 128 +#define MAX_SECTORS 255 +#define MAX_QUEUE_SECTORS (4 << (20 - 9)) /* 4 mbytes when full sized */ +#define MAX_NR_REQUESTS 1024 /* 1024k when in 512 units, normally min is 1M in 1k units */ + +#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK) + +#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue) +#define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next) +#define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev) +#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next) +#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev) + +extern void drive_stat_acct (kdev_t dev, int rw, + unsigned long nr_sectors, int new_io); + +static inline int get_hardsect_size(kdev_t dev) +{ + int retval = 512; + int major = MAJOR(dev); + + if (hardsect_size[major]) { + int minor = MINOR(dev); + if (hardsect_size[major][minor]) + retval = hardsect_size[major][minor]; + } + return retval; +} + +static inline int blk_oversized_queue(request_queue_t * q) +{ + if (q->can_throttle) + return atomic_read(&q->nr_sectors) > q->max_queue_sectors; + return q->rq.count == 0; +} + +static inline int blk_oversized_queue_reads(request_queue_t * q) +{ + if (q->can_throttle) + return atomic_read(&q->nr_sectors) > q->max_queue_sectors + q->batch_sectors; + return q->rq.count == 0; +} + +static inline int blk_oversized_queue_batch(request_queue_t * q) +{ + return atomic_read(&q->nr_sectors) > q->max_queue_sectors - q->batch_sectors; +} + +#define blk_finished_io(nsects) do { } while (0) +#define blk_started_io(nsects) do { } while (0) + +static inline void blk_started_sectors(struct request *rq, int count) +{ + request_queue_t *q = rq->q; + if (q && q->can_throttle) { + atomic_add(count, &q->nr_sectors); + if (atomic_read(&q->nr_sectors) < 0) { + printk("nr_sectors is %d\n", atomic_read(&q->nr_sectors)); + BUG(); + } + } +} + +static inline void blk_finished_sectors(struct request *rq, int count) +{ + request_queue_t *q = rq->q; + if (q && q->can_throttle) { + atomic_sub(count, &q->nr_sectors); + + smp_mb(); + if (q->rq.count >= q->batch_requests && !blk_oversized_queue_batch(q)) { + if (waitqueue_active(&q->wait_for_requests)) + wake_up(&q->wait_for_requests); + } + if (atomic_read(&q->nr_sectors) < 0) { + printk("nr_sectors is %d\n", atomic_read(&q->nr_sectors)); + BUG(); + } + } +} + +static inline unsigned int blksize_bits(unsigned int size) +{ + unsigned int bits = 8; + do { + bits++; + size >>= 1; + } while (size > 256); + return bits; +} + +static inline unsigned int block_size(kdev_t dev) +{ + int retval = BLOCK_SIZE; + int major = MAJOR(dev); + + if (blksize_size[major]) { + int minor = MINOR(dev); + if (blksize_size[major][minor]) + retval = blksize_size[major][minor]; + } + return retval; +} + +#endif -- 2.30.2